import time
import pymongo
import requests
from fake_useragent import UserAgent
from lxml import etree

ua = UserAgent()

client = pymongo.MongoClient()
client.drop_database("北京法院")
db = client.get_database("北京法院")
collection = db.get_collection("执行信息")

for page in range(438,439):
    url = f"https://www.bjcourt.gov.cn/zxxx/indexOld.htm?st=1&zxxxlx=100013007&bzxrlx=&bzxrxm=&zrr=&frhqtzz=&jbfyId=&ah=&dqxh=26&page={page}"
    headers = {
        "User-Agent": ua.random,
        "referer": "https://www.bjcourt.gov.cn/"
    }
    res = requests.get(url,headers = headers)
    tree = etree.HTML(res.text)
    items = tree.xpath('//table[@class="table_list_02"]/tr[position()>1]')
    datas = []
    for item in items:
        tds = item.xpath('./td/text()')
        print(tds, "=========")
        data = {
            "id": tds[0],
			"name": tds[1].strip(),
			"type": tds[2],
			"value": tds[3],
			"no": tds[4],
			"address": tds[5],
			"time": tds[6] if len(tds) > 6 else ''
        }
        datas.append(data)
    print(f"保存第{page}页成功")
    collection.insert_many(datas)
    time.sleep(1)

print(f"the end")
client.close()





